3  Results

Code
library(ggplot2)
library(plotly)
library(tidyverse)
library(dplyr)
library(parcoords)
library(d3r)
library(GGally)
library(forcats)
library(redav)
Code
df <- read.csv("Data_Cricket_Data_by_season_all.csv")

df$Country <- sub(".*\\((.*?)\\).*", "\\1",df$Player)
df$Player <- sub("\\(.*?\\)", paste0(""),df$Player)

df$NotOut <- grepl("\\*", df$HS)

# Convert logical values to "Yes" and "No"
df$NotOut <- ifelse(df$NotOut, "Yes", "No")

df$HS <- gsub("\\*", "", df$HS)

# Assuming df is your dataframe and Season is the column with season values
df$start_year <- (sub("/.*", "", df$Season))  # Extract start year

# For cases like "yyyy/yy" or "yyyy/yy", extract start and end years
if (any(grepl("/", df$Season))) {
  df$start_year <- (sub("/.*", "", df$Season))
  df$end_year <- (paste0(substr(df$Season,1,2),
                                   sub(".*?/(\\d+)$", "\\1", df$Season)))

  } else {
  # For single-value seasons, start and end years are the same
  df$end_year <- df$start_year
}

df$end_year <- gsub("1900", "2000", df$end_year)

df$end_year[df$end_year %in% grep("^.{6}$", df$end_year, value = TRUE)] <- substr(df$end_year[df$end_year %in% grep("^.{6}$", df$end_year, value = TRUE)], 3, 6)

# Convert start_year and end_year to integer

df$Season <- NULL

df <- na.omit(df)

df[] <- lapply(df, function(x) gsub("-", "0", x))

df_Ind <- df[df$Country=="IND" | df$Country=="ICC/IND",]



df1 <- df_Ind %>%
  group_by(start_year) %>%
  summarise(total_runs = sum(as.numeric(RunsDescending)))

odi_stats_ind <- read.csv("Data_India_ODI_stats.csv")

df1$ave_runs <- df1$total_runs/odi_stats_ind$PLD
Code
###################Plot 1##################################
ggplot(df1, aes(x=as.numeric(start_year), y=ave_runs))+
  geom_line()+
  geom_point()+
  labs(x="Year", y="Average Runs (by Matches Played)")+
  scale_x_continuous(breaks=seq(1970,2025,2))

Code
###################Plot 1##################################

3.1 Observation

Code
names(df)[15] <- "start"
names(df)[16] <- "end"
names(df)[5] <- "Runs"

decade <- cut(as.numeric(df$start),
              breaks = seq(1970,2030,by=10),
              dig.lab = 4,
              include.lowest = TRUE,
              right=FALSE)

df$Decade <- decade

filtered_10_20 <- df %>%
  filter(Decade == "[2010,2020)") %>%
  group_by(Player, Country) %>%
  summarise(Runs = sum(as.numeric(Runs)), decade_avg_str = mean(as.numeric(SR)),
            decade_avg_ba = mean(as.numeric(Ave)))

filtered_10_20 <- filtered_10_20[order(filtered_10_20$Runs,
                                       decreasing = TRUE),]
Code
library(plotly)

## Add ggplotly later on!

ggparcoord(filtered_10_20[1:20,],
                  columns=c(3:5),
                  groupColumn = 1, showPoints = TRUE)

3.2 ICC 2011 World Cup (Men’s) Final

Code
icc_2011 <- read.csv("Data_ICC_2011_F.csv")
icc_2011$match_id <- NULL

icc_2011$wicket_type[icc_2011$wicket_type==""] <- NA
icc_2011$player_dismissed[icc_2011$player_dismissed==""] <- NA
icc_2011$over <- floor(icc_2011$ball)

over_intervals <- cut(icc_2011$over, breaks=seq(0,50,10),
                      include.lowest = TRUE,
                      right = FALSE)
icc_2011$binned_overs <- over_intervals

df_2011_p1 <- icc_2011 %>%
  group_by(binned_overs, innings) %>%
  summarize(runs = sum(runs_off_bat)+sum(extras),
            n_wickets = sum(!is.na(wicket_type)))

################ Plot Type 1 #####################

df_2011_p1 %>%
  pivot_longer(innings) %>%
  ggplot(aes(x=binned_overs, y=runs, fill=factor(value)))+
  geom_col(position="dodge")+
  scale_x_discrete(breaks=c("[0,10)", "[10,20)","[20,30)", "[30,40)","[40,50]"),
                   labels=c("1-10", "11-20", "21-30", "31-40", "41-50"))+
  scale_y_continuous(breaks=seq(0,100,10))+
  labs(title = "",
       x = "Overs",
       y = "Runs Scores",
       fill = "Innings")+
  geom_point(aes(y=runs),
             position=position_dodge(width=0.85),
             size=df_2011_p1$n_wickets*5, alpha=0.2)+
  geom_text(vjust = 0, hjust = 0.64, label=df_2011_p1$n_wickets,
            position = position_dodge(width = 0.80),
            size=3)

Code
################ Plot Type 1 #####################

################ Plot Type 2 #####################

inning_2 <- icc_2011[icc_2011$innings==2,]

df_2011_p2a <- inning_2 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2011_p2a, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by Indian Batsmen in front of Sri-Lankan Bowlers")

Code
# Plot 2
inning_1 <- icc_2011[icc_2011$innings==1,]

df_2011_p2b <- inning_1 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2011_p2b, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by Sri-Lankan Batsmen in front of Indian Bowlers")

Code
################ Plot Type 2 #####################

3.3 ICC 2015 World Cup (Men’s) Final

Code
icc_2015 <- read.csv("Data_ICC_2015_F.csv")
icc_2015$match_id <- NULL

icc_2015$wicket_type[icc_2015$wicket_type==""] <- NA
icc_2015$player_dismissed[icc_2015$player_dismissed==""] <- NA
icc_2015$over <- floor(icc_2015$ball)

over_intervals <- cut(icc_2015$over, breaks=seq(0,50,10),
                      include.lowest = TRUE,
                      right = FALSE)
icc_2015$binned_overs <- over_intervals

df_2015_p1 <- icc_2015 %>%
  group_by(binned_overs, innings) %>%
  summarize(runs = sum(runs_off_bat)+sum(extras),
            n_wickets = sum(!is.na(wicket_type)))

df_2015_p1 %>%
  pivot_longer(innings) %>%
  ggplot(aes(x=binned_overs, y=runs, fill=factor(value)))+
  geom_col(position="dodge")+
  scale_x_discrete(breaks=c("[0,10)", "[10,20)","[20,30)", "[30,40)","[40,50]"),
                   labels=c("1-10", "11-20", "21-30", "31-40", "41-50"))+
  scale_y_continuous(breaks=seq(0,100,10))+
  labs(title = "",
       x = "Overs",
       y = "Runs Scores",
       fill = "Innings")+
  geom_point(aes(y=runs),
             position=position_dodge(width=0.85),
             size=df_2015_p1$n_wickets*5, alpha=0.2)+
  geom_text(vjust = 0, hjust = 0.64, label=df_2015_p1$n_wickets,
            position = position_dodge(width = 0.80),
            size=3)

Code
################ Plot Type 1 #####################

################ Plot Type 2 #####################

# Plot 1
inning_2 <- icc_2015[icc_2015$innings==2,]

df_2015_p2a <- inning_2 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2015_p2a, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by Australian Batsmen in front of New Zealand Bowlers")

Code
# Plot 2
inning_1 <- icc_2015[icc_2015$innings==1,]

df_2015_p2b <- inning_1 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2015_p2b, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by New Zealand Batsmen in front of Australian Bowlers")

Code
################ Plot Type 2 #####################
####################### ICC 2015 Final ######################

3.4 ICC 2019 World Cup (Men’s) Final

Code
####################### ICC 2019 Final ######################
####### Preprocessing #######
icc_2019 <- read.csv("Data_ICC_2019_F.csv")
icc_2019$match_id <- NULL

icc_2019$wicket_type[icc_2019$wicket_type==""] <- NA
icc_2019$player_dismissed[icc_2019$player_dismissed==""] <- NA
icc_2019$over <- floor(icc_2019$ball)

over_intervals <- cut(icc_2019$over, breaks=seq(0,50,10),
                      include.lowest = TRUE,
                      right = FALSE)
icc_2019$binned_overs <- over_intervals
####### Preprocessing #######

# Comment: These missing values don't indicate lack of data but are logical in nature

################ Plot Type 1 #####################
df_2019_p1 <- icc_2019 %>%
  group_by(binned_overs, innings) %>%
  summarize(runs = sum(runs_off_bat)+sum(extras),
            n_wickets = sum(!is.na(wicket_type)))

df_2019_p1 %>%
  pivot_longer(innings) %>%
  ggplot(aes(x=binned_overs, y=runs, fill=factor(value)))+
  geom_col(position="dodge")+
  scale_x_discrete(breaks=c("[0,10)", "[10,20)","[20,30)", "[30,40)","[40,50]"),
                   labels=c("1-10", "11-20", "21-30", "31-40", "41-50"))+
  scale_y_continuous(breaks=seq(0,100,10))+
  labs(title = "",
       x = "Overs",
       y = "Runs Scores",
       fill = "Innings")+
  geom_point(aes(y=runs),
             position=position_dodge(width=0.85),
             size=df_2019_p1$n_wickets*5, alpha=0.2)+
  geom_text(vjust = 0, hjust = 0.64, label=df_2019_p1$n_wickets,
            position = position_dodge(width = 0.80),
            size=3)

Code
################ Plot Type 1 #####################



################ Plot Type 2 #####################

# Plot 1
inning_2 <- icc_2019[icc_2019$innings==2,]

df_2019_p2a <- inning_2 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2019_p2a, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by New Zealand Batsmen in front of England Bowlers")

Code
# Plot 2
inning_1 <- icc_2019[icc_2019$innings==1,]

df_2019_p2b <- inning_1 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2019_p2b, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by England Batsmen in front of New Zealand Bowlers")

Code
################ Plot Type 2 #####################
####################### ICC 2019 Final ######################

3.5 ICC 2023 World Cup (Men’s) Final

Code
####################### ICC 2023 Final ######################
####### Preprocessing #######
icc_2023 <- read.csv("Data_ICC_2023_F.csv")
icc_2023$match_id <- NULL

icc_2023$wicket_type[icc_2023$wicket_type==""] <- NA
icc_2023$player_dismissed[icc_2023$player_dismissed==""] <- NA
icc_2023$over <- floor(icc_2023$ball)

over_intervals <- cut(icc_2023$over, breaks=seq(0,50,10),
                      include.lowest = TRUE,
                      right = FALSE)
icc_2023$binned_overs <- over_intervals
####### Preprocessing #######

################ Plot Type 1 #####################
df_2023_p1 <- icc_2023 %>%
  group_by(binned_overs, innings) %>%
  summarize(runs = sum(runs_off_bat)+sum(extras),
            n_wickets = sum(!is.na(wicket_type)))

df_2023_p1 %>%
  pivot_longer(innings) %>%
  ggplot(aes(x=binned_overs, y=runs, fill=factor(value)))+
  geom_col(position="dodge")+
  scale_x_discrete(breaks=c("[0,10)", "[10,20)","[20,30)", "[30,40)","[40,50]"),
                   labels=c("1-10", "11-20", "21-30", "31-40", "41-50"))+
  scale_y_continuous(breaks=seq(0,100,10))+
  labs(title = "",
       x = "Overs",
       y = "Runs Scores",
       fill = "Innings")+
  geom_point(aes(y=runs),
             position=position_dodge(width=0.85),
             size=df_2023_p1$n_wickets*5, alpha=0.2)+
  geom_text(vjust = 0, hjust = 0.64, label=df_2023_p1$n_wickets,
            position = position_dodge(width = 0.80),
            size=3)

Code
################ Plot Type 1 #####################



################ Plot Type 2 #####################

# Plot 1
inning_2 <- icc_2023[icc_2023$innings==2,]

df_2023_p2a <- inning_2 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2023_p2a, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by Australian Batsmen in front of Indian Bowlers")

Code
# Plot 2
inning_1 <- icc_2023[icc_2023$innings==1,]

df_2023_p2b <- inning_1 %>%
  group_by(binned_overs, striker, bowler) %>%
  summarise(runs_scored=sum(runs_off_bat))

ggplot(df_2023_p2b, aes(y=bowler, x=runs_scored, fill=binned_overs))+
  geom_col()+
  facet_wrap(~striker)+
  xlab("Runs Scored")+
  ylab("Bowler")+
  ggtitle("Runs scored by Indian Batsmen in front of Australian Bowlers")

Code
################ Plot Type 2 #####################
####################### ICC 2023 Final ######################